import docx
from pathlib import Path
import glob
from docx.oxml.ns import qn


def one_html_to_docx(txt_file, to_docx_file):
    def read_fun(encoding):
        try:
            with open(txt_file, 'r', encoding=encoding) as f:
                return f.read()
        except:
            return None

    success = False
    for encoding in ['gbk', 'utf-8', 'gb2312']:
        """尝试各种编码，如果任意以中国没报错，就认为是那种编码
        当然如果确定txt是某种编码的，写法就更简单且运行也更快速稳定"""
        txt = read_fun(encoding)
        if txt:
            success = True
            try:
                document = docx.Document()
                document.styles['Normal'].font.name = '宋体'
                document.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')
                paragraph = document.add_paragraph()
                run = paragraph.add_run(txt)
                run.font.name = '宋体'
                document.save(to_docx_file)
                print(txt_file)
            except Exception as e:
                print(f"转换{txt_file}出错，", e)

            break
    if not success:
        print(f'读{txt_file}失败')


def all_txt_to_docx(txt_dir, docs_dir):
    for html_file in glob.glob(rf"{txt_dir}\**\*.txt", recursive=True):
        to_docx_file = html_file.replace(txt_dir, docs_dir).replace(".txt", ".docx")
        if Path(f'{to_docx_file}.docx').exists():
            continue
        one_html_to_docx(html_file, to_docx_file)


if __name__ == '__main__':
    txt_dir = r"D:\dl\txts"
    docs_dir = r"D:\dl\docs"
    all_txt_to_docx(txt_dir, docs_dir)
